Setup

#Importing libraries for Shiny dashboard and documentation
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(rlang)
## 
## Attaching package: 'rlang'
## 
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice
library(roxygen2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(here)
## here() starts at /Users/kaylaemerson/Desktop/ENV710_Statistics/FinalProject/ENV710_FinalProject
library(moments)
library(dunn.test)
library(nlme)
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
library(gt)

Load and Clean Data (Sameer)

#Load dataset from an RDS file named "optdata.rds"
settlement_data <- read_csv("Settlement_Data_with_updated_demographics.csv")
## New names:
## Rows: 411 Columns: 16
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): MonitoringYear, SettlementName, MPAName, TimePoint dbl (12): ...1,
## SettlementID, MPAID, Treatment, MAIndex, MTIndex, FSIndex, P...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
head(settlement_data)
#Rename columns
settlement_data <- settlement_data %>%
                  rename(`Household_Material_Assets` = MAIndex, `Household_Marine_Tenure` = MTIndex, 
                         `Food_Security_Index` = FSIndex, `Place_Attachment_Index` = PAIndex,
                          `School_Enrollment_Rate` = SERate)
head(settlement_data)
#Round values to 2 decimal place
settlement_data <- settlement_data %>% 
                  mutate(`Household_Material_Assets` = round(`Household_Material_Assets`, 2)) %>%
                  mutate(`Household_Marine_Tenure` = round(`Household_Marine_Tenure`, 2)) %>%
                  mutate(`Food_Security_Index` = round(`Food_Security_Index`, 2)) %>%
                  mutate(`Place_Attachment_Index` = round(`Place_Attachment_Index`, 2)) %>%
                  mutate(`School_Enrollment_Rate` = round(`School_Enrollment_Rate`, 2)) %>%
                  mutate(`YrResident` = round(`YrResident`, 2)) %>%
                  mutate(`Age`= round(`Age`, 2)) %>%
                  mutate(`Gender` = round(`Gender`, 2))
#Look at summary
summary(settlement_data)
##       ...1        SettlementID    MonitoringYear     SettlementName    
##  Min.   :  1.0   Min.   :  1.00   Length:411         Length:411        
##  1st Qu.:103.5   1st Qu.: 28.00   Class :character   Class :character  
##  Median :206.0   Median : 54.00   Mode  :character   Mode  :character  
##  Mean   :206.0   Mean   : 54.65                                        
##  3rd Qu.:308.5   3rd Qu.: 80.00                                        
##  Max.   :411.0   Max.   :115.00                                        
##      MPAID         MPAName            Treatment       TimePoint        
##  Min.   :1.000   Length:411         Min.   :0.0000   Length:411        
##  1st Qu.:2.000   Class :character   1st Qu.:0.0000   Class :character  
##  Median :3.000   Mode  :character   Median :1.0000   Mode  :character  
##  Mean   :3.416                      Mean   :0.6886                     
##  3rd Qu.:5.000                      3rd Qu.:1.0000                     
##  Max.   :6.000                      Max.   :1.0000                     
##  Household_Material_Assets Household_Marine_Tenure Food_Security_Index
##  Min.   :0.1500            Min.   :1.000           Min.   :0.420      
##  1st Qu.:0.4300            1st Qu.:1.900           1st Qu.:3.075      
##  Median :0.4900            Median :2.340           Median :3.480      
##  Mean   :0.4821            Mean   :2.401           Mean   :3.406      
##  3rd Qu.:0.5400            3rd Qu.:2.875           3rd Qu.:3.820      
##  Max.   :0.7400            Max.   :4.250           Max.   :4.850      
##  Place_Attachment_Index School_Enrollment_Rate   YrResident         Age       
##  Min.   :3.430          Min.   :0.1200         Min.   : 1.53   Min.   :28.40  
##  1st Qu.:3.900          1st Qu.:0.7400         1st Qu.:24.24   1st Qu.:41.84  
##  Median :4.000          Median :0.8300         Median :30.30   Median :44.82  
##  Mean   :4.015          Mean   :0.8123         Mean   :29.06   Mean   :44.69  
##  3rd Qu.:4.090          3rd Qu.:0.9100         3rd Qu.:35.38   3rd Qu.:47.65  
##  Max.   :4.990          Max.   :1.0000         Max.   :46.67   Max.   :59.80  
##      Gender      
##  Min.   :0.6000  
##  1st Qu.:0.8800  
##  Median :0.9200  
##  Mean   :0.9144  
##  3rd Qu.:1.0000  
##  Max.   :1.0000
#Remove NAs
settlement_data <- na.omit(settlement_data)

Figure 1 (Kayla)

# Graph 1
# visualize the distribution of the food security index observations by MPA 
histogram_1 <- ggplot(settlement_data, aes(x = Place_Attachment_Index,
                                           fill = MPAName)) +
  geom_histogram(bins = 8) +  # follow rice's rule to get 6ish
  labs(x = "Place Attachment Index", 
       y = "Settlement Observation Count",
       title = "Place Attachment Index Distributions within Six MPAs in Papua, Indonesia") +
  facet_wrap(~MPAName, nrow = 2)+
  theme_bw() +
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5)) 

# view histogram 
histogram_1

# save histogram to files 
ggsave("BarfieldAEmersonKSwarupSFig1.jpg", plot = histogram_1)
## Saving 7 x 5 in image

Figure 2 (Andrew)

# Graph 2
# A scatterplot depicting the relationship between the food security 
# and place attachment indices
fs_pa_scatterplot <- ggplot(settlement_data, aes(x = YrResident,
                            y = Place_Attachment_Index)) +
  geom_point(aes(colour = MPAName)) +
  geom_smooth(method = "lm", color = "black") +
  facet_grid(~MPAName) +
  labs(x = "Household Years of Residency",
       y = "Place Attachment Index",
       color = "MPA Name") +
  ggtitle("Relationship Between Household Years of Residency 
and MPA Place Attachment within Papua, Indonesian") +
  theme_bw() +
  theme(legend.position = "none",
        legend.background = element_rect(color = "black"),
        strip.text.x = element_text(size = 7),
        plot.title = element_text(hjust = 0.5))

# View scatterplot
fs_pa_scatterplot
## `geom_smooth()` using formula = 'y ~ x'
A series of scatterplots depicting the relationship between food security and place attachment within specific marine protected area (MPA) regions in Indonesia. Both indices are scaled 1-5.

A series of scatterplots depicting the relationship between food security and place attachment within specific marine protected area (MPA) regions in Indonesia. Both indices are scaled 1-5.

# save scatterplot to files 
ggsave("BarfieldAEmersonKSwarupSFig2.jpg", plot = fs_pa_scatterplot)
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'

Figure 3 (Sameer)

#Household Marine Tenure is defined as the average number of rights a household
# has over marine resources in their MPA
#Potential for this to be a strong predictor of Place Attachment Index

#Draw a scatterplot with a regression line to examine correlation
mt_pa_scatterplot <- ggplot(settlement_data, aes(x = Household_Marine_Tenure, 
                                                 y = Place_Attachment_Index)) +
  geom_point() +
  geom_smooth(method = "lm", col = "red") +
  labs(x = "Household Marine Tenure",
       y = "Place Attachment Index") +
  ggtitle("Relationship of Place Attachment to Bird's Head Seascape MPAs against 
          Household Rights over Marine Resources")

mt_pa_scatterplot
## `geom_smooth()` using formula = 'y ~ x'
A scatterplot depicting the relationship between household marine tenure and place attachment over all marine protected areas (MPAs) in Bird's Head Seascape, Indonesia. Both indices are scaled 1-5.

A scatterplot depicting the relationship between household marine tenure and place attachment over all marine protected areas (MPAs) in Bird’s Head Seascape, Indonesia. Both indices are scaled 1-5.

# save scatterplot to files 
ggsave("BarfieldAEmersonKSwarupSFig3.jpg", plot = mt_pa_scatterplot)
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'

Figure 4 (Sameer)

#Homoscedastic distribution of Place Attachment Index values when plotted against 
# Household Marine Tenure in Figure 3

#Will examine scatterplot of Place Attachment Index against Household Marine 
#Tenure for the various MPAs
mt_pa_all_mpas_scatterplot <- ggplot(settlement_data, aes(x = Household_Marine_Tenure,
                            y = Place_Attachment_Index)) +
  geom_point(aes(colour = MPAName)) +
  geom_smooth(method = "lm", color = "black") +
  facet_grid(~MPAName) +
  labs(x = "Household Marine Tenure",
       y = "Place Attachment Index",
       color = "MPA Name") +
  ggtitle("Relationship Between Household Marine Tenure and Place Attachment to
          MPAs in Bird's Head Seascape, Indonesia") +
  theme_bw() +
  theme(legend.position = "bottom",
        legend.background = element_rect(color = "black"),
        strip.text.x = element_text(size = 7),
        plot.title = element_text(hjust = 0.5))

#View scatterplot
mt_pa_all_mpas_scatterplot
## `geom_smooth()` using formula = 'y ~ x'
A series of scatterplots depicting the relationship between household marine tenure and place attachment within specific marine protected area (MPA) regions in Indonesia. Both indices are scaled 1-5.

A series of scatterplots depicting the relationship between household marine tenure and place attachment within specific marine protected area (MPA) regions in Indonesia. Both indices are scaled 1-5.

# save scatterplot to files 
ggsave("BarfieldAEmersonKSwarupSFig4.jpg", plot = mt_pa_all_mpas_scatterplot)
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'

Figure 5 (Sameer)

#Household Material Assets is defined as the economic well-being of a household
#Potential for this to be a strong predictor of Place Attachment Index

#Draw a scatterplot with a regression line to examine correlation
ma_pa_scatterplot <- ggplot(settlement_data, aes(x = Household_Material_Assets, y = Place_Attachment_Index)) +
  geom_point() +
  geom_smooth(method = "lm", col = "red") +
  labs(x = "Household Material Assets",
       y = "Place Attachment Index") +
  ggtitle("Relationship of Place Attachment to Bird's Head Seascape MPAs against 
          Household Material Assets")

ma_pa_scatterplot
## `geom_smooth()` using formula = 'y ~ x'
A scatterplot depicting the relationship between household material assets and place attachment over all marine protected areas (MPAs) in Bird's Head Seascape, Indonesia.

A scatterplot depicting the relationship between household material assets and place attachment over all marine protected areas (MPAs) in Bird’s Head Seascape, Indonesia.

# save scatterplot to files 
ggsave("BarfieldAEmersonKSwarupSFig5.jpg", plot = ma_pa_scatterplot)
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'

Figure 6 (Sameer)

#Homoscedastic distribution of Place Attachment Index values when plotted against 
# Household Material Assets in Figure 5

#Will examine scatterplot of Place Attachment Index against Household Material Assets for the various MPAs
ma_pa_all_mpas_scatterplot <- ggplot(settlement_data, aes(x = Household_Material_Assets,
                            y = Place_Attachment_Index)) +
  geom_point(aes(colour = MPAName)) +
  geom_smooth(method = "lm", color = "black") +
  facet_grid(~MPAName) +
  labs(x = "Household Material Assets",
       y = "Place Attachment Index",
       color = "MPA Name") +
  ggtitle("Relationship Between Household Material Assets and Place Attachment 
          to MPAs in Bird's Head Seascape, Indonesia") +
  theme_bw() +
  theme(legend.position = "bottom",
        legend.background = element_rect(color = "black"),
        strip.text.x = element_text(size = 7))

#View scatterplot
ma_pa_all_mpas_scatterplot
## `geom_smooth()` using formula = 'y ~ x'
A series of scatterplots depicting the relationship between household material assets and place attachment within specific marine protected area (MPA) regions in Indonesia.

A series of scatterplots depicting the relationship between household material assets and place attachment within specific marine protected area (MPA) regions in Indonesia.

# save scatterplot to files 
ggsave("BarfieldAEmersonKSwarupSFig6.jpg", plot = ma_pa_all_mpas_scatterplot)
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'

Multiple Linear Regression Model (Andrew)

# Define Research Question:
# Is there a relationship between place attachment index and gender, 
# age, and years of residency in Indonesian households?

# Examine data and possible correlations
# Raw Counts
ggplot(data = settlement_data, aes(x = Place_Attachment_Index)) +
  geom_bar() +
  labs(y = "Count") +
  theme_bw()

ggplot(data = settlement_data, aes(x = Age)) +
  geom_histogram() +
  labs(y = "Count") +
  theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = settlement_data, aes(x = Gender)) +
  geom_bar() +
  labs(y = "Count") +
  theme_bw()

ggplot(data = settlement_data, aes(x = YrResident)) +
  geom_histogram() +
  labs(y = "Count") +
  theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Possible Correlations
ggplot(data = settlement_data, aes(x = Age, y = Place_Attachment_Index)) +
  geom_point() +
  theme_bw()

ggplot(data = settlement_data, aes(x = Gender, y = Place_Attachment_Index)) +
  geom_point() +
  theme_bw()

ggplot(data = settlement_data, aes(x = YrResident, y = Place_Attachment_Index)) +
  geom_point() +
  theme_bw()

# Multi-collinearity tests
cor.test(settlement_data$Age, settlement_data$Gender)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Age and settlement_data$Gender
## t = -6.1839, df = 409, p-value = 1.517e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3784339 -0.2013749
## sample estimates:
##        cor 
## -0.2924084
cor.test(settlement_data$Age, settlement_data$YrResident)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Age and settlement_data$YrResident
## t = 9.553, df = 409, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3446207 0.5030580
## sample estimates:
##       cor 
## 0.4271122
cor.test(settlement_data$Gender, settlement_data$YrResident)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Gender and settlement_data$YrResident
## t = -1.795, df = 409, p-value = 0.07339
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.183570869  0.008389558
## sample estimates:
##         cor 
## -0.08841148
# Fit regression model
Indonesia_individual_model <- lm(Place_Attachment_Index ~ YrResident +
                                   Age + Gender, data = settlement_data)


summary(Indonesia_individual_model)
## 
## Call:
## lm(formula = Place_Attachment_Index ~ YrResident + Age + Gender, 
##     data = settlement_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58177 -0.11080 -0.01866  0.08451  1.00455 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.1429070  0.1954449  21.197   <2e-16 ***
## YrResident   0.0020024  0.0013464   1.487    0.138    
## Age          0.0004525  0.0028256   0.160    0.873    
## Gender      -0.2260258  0.1389061  -1.627    0.104    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2094 on 407 degrees of freedom
## Multiple R-squared:  0.01592,    Adjusted R-squared:  0.008666 
## F-statistic: 2.195 on 3 and 407 DF,  p-value: 0.08812
# AIC Value of linear model
AIC(Indonesia_individual_model)
## [1] -112.9781
# Evaluate Model Diagnostics
plot(Indonesia_individual_model)

# Communicate Results
# The results of our multiple linear regression reveal that age, gender, and
# years of residency are not significant predictors of place attachment in 
# Indonesia (F(410) = 2.195, adjusted R^2 = 0.009, p = 0.09). 

Kruskal Wallis and Dunn’s Test 1 (Andrew)

# Null Hypothesis: Place attachment between all MPAs are the same.

# Alternative Hypothesis: Place attachment between at least two 
# MPAs are different. 

# Examining Data Distributions
ggplot(settlement_data, aes(x = Place_Attachment_Index,
                            fill = MPAName)) +
  geom_histogram() +
  facet_grid(.~MPAName) +
  labs(x = "Place Attachment Index",
       y = "Count",
       color = "MPA Name") +
  theme_bw() +
  theme(legend.position = "bottom",
        strip.text.x = element_text(size = 7))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Use Q-Q Plots, skewness, and kurtosis to test for normality
ggplot(settlement_data, aes(sample = Place_Attachment_Index)) +
  geom_qq() +
  geom_qq_line() +
  labs (x = "Theoretical Normal Distribution",
        y = "Raw Place Attachment Index Values")

skewness(settlement_data$Place_Attachment_Index)
## [1] 1.254365
kurtosis(settlement_data$Place_Attachment_Index)
## [1] 6.929917
# Non-normal data, so we will use Kruskal-Wallis and Dunn's post-hoc tests
Indonesia_MPA_kruskal_test <- kruskal.test(Place_Attachment_Index ~ MPAName, 
                                                  data = settlement_data)

Indonesia_MPA_kruskal_test
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Place_Attachment_Index by MPAName
## Kruskal-Wallis chi-squared = 16.183, df = 5, p-value = 0.00634
# Results of Kruskal Test are significant, so let's run a Dunn's Test
Indonesia_MPA_dunn_test <- dunn.test(settlement_data$Place_Attachment_Index, settlement_data$MPAName)
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 16.1834, df = 5, p-value = 0.01
## 
## 
##                            Comparison of x by group                            
##                                 (No adjustment)                                
## Col Mean-|
## Row Mean |   Kaimana    Kofiau d   Misool S   Selat Da   Teluk Ce
## ---------+-------------------------------------------------------
## Kofiau d |  -2.688998
##          |    0.0036*
##          |
## Misool S |  -2.118790   1.056939
##          |    0.0171*     0.1453
##          |
## Selat Da |  -0.143330   2.586212   1.987140
##          |     0.4430    0.0049*    0.0235*
##          |
## Teluk Ce |  -2.768019   0.772198  -0.469646  -2.631757
##          |    0.0028*     0.2200     0.3193    0.0042*
##          |
## Teluk Ma |  -2.193081   0.768487  -0.278202  -2.072756   0.113658
##          |    0.0142*     0.2211     0.3904    0.0191*     0.4548
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2

Kruskal Wallis and Dunn’s Test 2 (Andrew)

# Null Hypothesis: Food security between all MPAs are the same.

# Alternative Hypothesis: Food security between at least two 
# MPAs are different. 

# Examining Data Distributions
ggplot(settlement_data, aes(x = Food_Security_Index,
                            fill = MPAName)) +
  geom_histogram() +
  facet_grid(.~MPAName) +
  labs(x = "Food Security Index",
       y = "Count",
       color = "MPA Name") +
  theme_bw() +
  theme(legend.position = "bottom",
        strip.text.x = element_text(size = 7))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Use Q-Q Plots, skewness, and kurtosis to test for normality
ggplot(settlement_data, aes(sample = Food_Security_Index)) +
  geom_qq() +
  geom_qq_line() +
  labs (x = "Theoretical Normal Distribution",
        y = "Raw Food Security Index Values")

skewness(settlement_data$Food_Security_Index)
## [1] -0.8122661
kurtosis(settlement_data$Food_Security_Index)
## [1] 4.571809
# Non-normal data, so we will use Kruskal-Wallis and Dunn's post-hoc tests
Indonesia_MPA_kruskal_test2 <- kruskal.test(Food_Security_Index ~ MPAName, 
                                                  data = settlement_data)

Indonesia_MPA_kruskal_test2
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Food_Security_Index by MPAName
## Kruskal-Wallis chi-squared = 13.316, df = 5, p-value = 0.02059
# Results of Kruskal Test are significant, so let's run a Dunn's Test
Indonesia_MPA_dunn_test2 <- dunn.test(settlement_data$Food_Security_Index, settlement_data$MPAName)
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 13.3157, df = 5, p-value = 0.02
## 
## 
##                            Comparison of x by group                            
##                                 (No adjustment)                                
## Col Mean-|
## Row Mean |   Kaimana    Kofiau d   Misool S   Selat Da   Teluk Ce
## ---------+-------------------------------------------------------
## Kofiau d |   1.934056
##          |     0.0266
##          |
## Misool S |   1.843798  -0.503765
##          |     0.0326     0.3072
##          |
## Selat Da |  -0.962296  -2.723294  -2.858532
##          |     0.1680    0.0032*    0.0021*
##          |
## Teluk Ce |   0.404722  -1.783939  -1.696291   1.505559
##          |     0.3428     0.0372     0.0449     0.0661
##          |
## Teluk Ma |   1.331516  -0.756317  -0.354337   2.246937   1.114744
##          |     0.0915     0.2247     0.3615    0.0123*     0.1325
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2

Multi-level Regression Model (Andrew)

# Make MPA names into a factor with 6 levels
settlement_data2 <- settlement_data %>% 
  mutate(MPAName = factor(MPAName, levels = c("Kaimana MPA",
                                              "Kofiau dan Pulau Boo MPA",
                                              "Misool Selatan Timur MPA",
                                              "Selat Dampier MPA",
                                              "Teluk Cenderawasih NP",
                                              "Teluk Mayalibit MPA")))

# Define Research Question:
# Do marine tenure, food security index, years of residency, and age
# significantly predict place attachment index in Indonesian households?

# Examine data and possible correlations
# Raw counts
ggplot(data = settlement_data2, aes(x = Household_Marine_Tenure)) +
  geom_histogram() +
  labs(y = "Count") +
  theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = settlement_data2, aes(x = Food_Security_Index)) +
  geom_histogram() +
  labs(y = "Count") +
  theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = settlement_data2, aes(x = YrResident)) +
  geom_histogram() +
  labs(y = "Count") +
  theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = settlement_data2, aes(x = Age)) +
  geom_histogram() +
  labs(y = "Count") +
  theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = settlement_data2, aes(x = Place_Attachment_Index)) +
  geom_bar() +
  labs(y = "Count") +
  theme_bw()

# Possible Correlations
ggplot(data = settlement_data2, aes(x = Household_Marine_Tenure, 
                                    y = Place_Attachment_Index)) +
  geom_point() +
  theme_bw()

ggplot(data = settlement_data, aes(x = Food_Security_Index, 
                                   y = Place_Attachment_Index)) +
  geom_point() +
  theme_bw()

ggplot(data = settlement_data, aes(x = YrResident, 
                                   y = Place_Attachment_Index)) +
  geom_point() +
  theme_bw()

ggplot(data = settlement_data, aes(x = Age, y = Place_Attachment_Index)) +
  geom_point() +
  theme_bw()

# Multi-collinearity tests
cor.test(settlement_data2$Household_Marine_Tenure,
         settlement_data2$Food_Security_Index)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data2$Household_Marine_Tenure and settlement_data2$Food_Security_Index
## t = -5.8, df = 409, p-value = 1.329e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3627357 -0.1838526
## sample estimates:
##        cor 
## -0.2756793
cor.test(settlement_data2$Household_Marine_Tenure,
         settlement_data2$YrResident)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data2$Household_Marine_Tenure and settlement_data2$YrResident
## t = 2.036, df = 409, p-value = 0.04239
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.003473309 0.195008737
## sample estimates:
##       cor 
## 0.1001689
cor.test(settlement_data2$Household_Marine_Tenure,
         settlement_data2$Age)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data2$Household_Marine_Tenure and settlement_data2$Age
## t = -3.0222, df = 409, p-value = 0.002667
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.24107960 -0.05180823
## sample estimates:
##        cor 
## -0.1477969
cor.test(settlement_data2$Food_Security_Index,
         settlement_data2$YrResident)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data2$Food_Security_Index and settlement_data2$YrResident
## t = 1.5794, df = 409, p-value = 0.115
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.0190111  0.1732856
## sample estimates:
##        cor 
## 0.07786137
cor.test(settlement_data2$Food_Security_Index,
         settlement_data2$Age)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data2$Food_Security_Index and settlement_data2$Age
## t = 5.4314, df = 409, p-value = 9.616e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1668286 0.3473861
## sample estimates:
##       cor 
## 0.2593723
cor.test(settlement_data2$YrResident,
         settlement_data2$Age)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data2$YrResident and settlement_data2$Age
## t = 9.553, df = 409, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3446207 0.5030580
## sample estimates:
##       cor 
## 0.4271122
# Fit linear regression model without random effect and evaluate model
# diagnostics
place_attachment_lm <- lm(Place_Attachment_Index ~ 
                            Household_Marine_Tenure +
                            Food_Security_Index +
                            YrResident +
                            Age, data = settlement_data2)

summary(place_attachment_lm)
## 
## Call:
## lm(formula = Place_Attachment_Index ~ Household_Marine_Tenure + 
##     Food_Security_Index + YrResident + Age, data = settlement_data2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57484 -0.11324 -0.00842  0.08892  0.96095 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              3.719496   0.132809  28.006  < 2e-16 ***
## Household_Marine_Tenure  0.051603   0.017048   3.027  0.00263 ** 
## Food_Security_Index     -0.012266   0.018334  -0.669  0.50383    
## YrResident               0.001102   0.001356   0.813  0.41664    
## Age                      0.004051   0.002806   1.443  0.14969    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2074 on 406 degrees of freedom
## Multiple R-squared:  0.03631,    Adjusted R-squared:  0.02682 
## F-statistic: 3.825 on 4 and 406 DF,  p-value: 0.00459
plot(place_attachment_lm)

# AIC Value of standard linear model
AIC(place_attachment_lm)
## [1] -119.5846
# Fit linear regression model with random effect and evaluate model
# diagnostics

# Re-fit standard linear regression
place_attachment_lm2 <- gls(Place_Attachment_Index ~ 
                            Household_Marine_Tenure +
                            Food_Security_Index +
                            YrResident +
                            Age, data = settlement_data2)
# Fit multi-level model with MPA name as random effect
place_attachment_rlm <- lme(Place_Attachment_Index ~ 
                            Household_Marine_Tenure +
                            Food_Security_Index +
                            YrResident +
                            Age, random = ~ 1 | MPAName, data = settlement_data2, 
                            na.action=na.exclude)

# Compare AIC Values
AIC(place_attachment_lm2, place_attachment_rlm)
## Warning in AIC.default(place_attachment_lm2, place_attachment_rlm): models are
## not all fitted to the same number of observations
# Plot residuals of multi-level model
plot(place_attachment_rlm)

qqnorm(place_attachment_rlm)

# Model summary
summary(place_attachment_rlm)
## Linear mixed-effects model fit by REML
##   Data: settlement_data2 
##        AIC       BIC   logLik
##   -52.2051 -24.73531 33.10255
## 
## Random effects:
##  Formula: ~1 | MPAName
##         (Intercept)  Residual
## StdDev:  0.05852237 0.2094833
## 
## Fixed effects:  Place_Attachment_Index ~ Household_Marine_Tenure + Food_Security_Index +      YrResident + Age 
##                             Value  Std.Error  DF   t-value p-value
## (Intercept)              3.771316 0.15066850 370 25.030557  0.0000
## Household_Marine_Tenure  0.036404 0.02025540 370  1.797269  0.0731
## Food_Security_Index     -0.016311 0.01978587 370 -0.824391  0.4102
## YrResident               0.002316 0.00151550 370  1.528322  0.1273
## Age                      0.003297 0.00300653 370  1.096582  0.2735
##  Correlation: 
##                         (Intr) Hs_M_T Fd_S_I YrRsdn
## Household_Marine_Tenure -0.556                     
## Food_Security_Index     -0.378  0.264              
## YrResident               0.139 -0.134  0.015       
## Age                     -0.737  0.174 -0.181 -0.439
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.61399176 -0.63189980 -0.05838216  0.45724255  4.20992865 
## 
## Number of Observations: 379
## Number of Groups: 5

Logistic Regression Model (Sameer)

#Sameer's Logistics Regression Model for determining Place Attachment

#Research Question: Do socioeconomic indicators (Household Material Assets, Household Marine Tenure, Food Security Index and School Enrollment Rate predict binary place attachment to MPAs (Attached/Not Attached)?)
# Possible relationships between covariates and response variable
ggplot(data = settlement_data2, aes(x = Household_Material_Assets, 
                                    y = Place_Attachment_Index)) +
  geom_point() +
  theme_bw()

ggplot(data = settlement_data2, aes(x = Household_Marine_Tenure, 
                                    y = Place_Attachment_Index)) +
  geom_point() +
  theme_bw()

ggplot(data = settlement_data, aes(x = Food_Security_Index, 
                                   y = Place_Attachment_Index)) +
  geom_point() +
  theme_bw()

ggplot(data = settlement_data, aes(x = School_Enrollment_Rate, 
                                   y = Place_Attachment_Index)) +
  geom_point() +
  theme_bw()

#Only School Enrollment Rate shows some relationship with PLace Attachment - slightly higher density of points with a >4.0 place attachment when School Enrollment Rate increases. All other variables show a random distribution of points with respect to Place Attachment
# Look at correlation between covariates and response variable
cor.test(settlement_data$Place_Attachment_Index, settlement_data$Household_Material_Assets)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Place_Attachment_Index and settlement_data$Household_Material_Assets
## t = -1.2861, df = 409, p-value = 0.1991
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.15921755  0.03346906
## sample estimates:
##        cor 
## -0.0634657
# -0.06
cor.test(settlement_data$Place_Attachment_Index, settlement_data$Household_Marine_Tenure)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Place_Attachment_Index and settlement_data$Household_Marine_Tenure
## t = 3.2469, df = 409, p-value = 0.001263
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.06275387 0.25139540
## sample estimates:
##       cor 
## 0.1585209
# 0.159
cor.test(settlement_data$Place_Attachment_Index, settlement_data$School_Enrollment_Rate)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Place_Attachment_Index and settlement_data$School_Enrollment_Rate
## t = -2.7473, df = 409, p-value = 0.006275
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.22836305 -0.03837756
## sample estimates:
##        cor 
## -0.1346072
# -0.135
cor.test(settlement_data$Place_Attachment_Index, settlement_data$Food_Security_Index)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Place_Attachment_Index and settlement_data$Food_Security_Index
## t = -1.0821, df = 409, p-value = 0.2798
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.14938718  0.04352427
## sample estimates:
##         cor 
## -0.05342996
# -0.053

# Multi-collinearity tests
cor.test(settlement_data$Household_Material_Assets, settlement_data$Household_Marine_Tenure)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Household_Material_Assets and settlement_data$Household_Marine_Tenure
## t = -1.2265, df = 409, p-value = 0.2207
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1563505  0.0364057
## sample estimates:
##         cor 
## -0.06053677
# correlation of the two variable is statistically non-significant, p-value = 0.221, cor = -0.06
cor.test(settlement_data$Household_Material_Assets, settlement_data$School_Enrollment_Rate)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Household_Material_Assets and settlement_data$School_Enrollment_Rate
## t = 4.1855, df = 409, p-value = 3.486e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1080535 0.2936375
## sample estimates:
##       cor 
## 0.2026645
# correlation of the two variable is statistically significant, p-value = 3.49e-05, cor = 0.203
cor.test(settlement_data$Household_Material_Assets, settlement_data$Food_Security_Index)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Household_Material_Assets and settlement_data$Food_Security_Index
## t = 3.5123, df = 409, p-value = 0.0004938
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.0756311 0.2634770
## sample estimates:
##       cor 
## 0.1711086
# correlation of the two variable is statistically significant, p-value = 0.0005, cor = 0.171
cor.test(settlement_data$Household_Marine_Tenure, settlement_data$School_Enrollment_Rate)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Household_Marine_Tenure and settlement_data$School_Enrollment_Rate
## t = -3.3954, df = 409, p-value = 0.0007523
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.25816849 -0.06996526
## sample estimates:
##       cor 
## -0.165574
# correlation of the two variable is statistically significant, p-value = 0.0008, cor = -0.166
cor.test(settlement_data$Household_Marine_Tenure, settlement_data$Food_Security_Index)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Household_Marine_Tenure and settlement_data$Food_Security_Index
## t = -5.8, df = 409, p-value = 1.329e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3627357 -0.1838526
## sample estimates:
##        cor 
## -0.2756793
# correlation of the two variable is statistically significant, p-value = 1.33e-08, cor = -0.276
cor.test(settlement_data$Food_Security_Index, settlement_data$School_Enrollment_Rate)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Food_Security_Index and settlement_data$School_Enrollment_Rate
## t = 2.6431, df = 409, p-value = 0.008531
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03327849 0.22351807
## sample estimates:
##       cor 
## 0.1295906
# correlation of the two variable is statistically significant, p-value = 0.009, cor = 0.130

#Statistical significance of correlations between multiple covariate pairs. However, all the correlations are weak (under 0.3) so we proceed with using all the covariates
#Define a function for Rice's Rule
rice_rule <- function(n) {
  return(2 * (n^(1/3)))
}
num_bins <- ceiling(rice_rule(length(settlement_data$Place_Attachment_Index)))

#Look at distribution of place attachment values
ggplot(settlement_data, aes(x = Place_Attachment_Index)) +
      geom_histogram(bins = num_bins) +
      labs(x = "Place Attachment", y = "Frequency", title = "Distribution of Place Attachment values amongst Eastern Indonesian communities") +
      theme_bw()

#Distribution is approximately normal with a slight right-skew
mean_PIA <- mean(settlement_data$Place_Attachment_Index)

#As an initial pass, convert Place_Attachment_Index into a binary categorical variable where values below mean are 0 and values above mean are 1. This categorization can be further explored and tweaked
settlement_data <- settlement_data %>% 
                    mutate(PIA_Binary = ifelse(Place_Attachment_Index > mean_PIA, 1, 0))
#Build first logistic regression model with PIA_Binary as response variable
PIA_glm <- glm(PIA_Binary ~  Household_Material_Assets + Household_Marine_Tenure + Food_Security_Index + School_Enrollment_Rate, data = settlement_data)
# Communicate Results
# The results of our logistic regression reveal that Household Material Assets is a significant predictor (p < 0.05) of the binary place attachment variable.
# AIC: 595.94

# household material assets p = 0.035
# household marine tenure p = 0.224
#food security index p = 0.123
# school enrollment rate p  = 0.507

#As an initial pass, Household Material Assets is a significant predictor (p < 0.05) of PIA_Binary
#Interesting as Household Material Assets did not show a strong correlation with overall Place Attachment but could be due to binary PIA values.
summary(PIA_glm)
## 
## Call:
## glm(formula = PIA_Binary ~ Household_Material_Assets + Household_Marine_Tenure + 
##     Food_Security_Index + School_Enrollment_Rate, data = settlement_data)
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                0.46552    0.26301   1.770   0.0775 .
## Household_Material_Assets -0.52772    0.24952  -2.115   0.0350 *
## Household_Marine_Tenure    0.04904    0.04030   1.217   0.2243  
## Food_Security_Index        0.06689    0.04324   1.547   0.1227  
## School_Enrollment_Rate    -0.13420    0.20218  -0.664   0.5072  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.2454008)
## 
##     Null deviance: 101.625  on 410  degrees of freedom
## Residual deviance:  99.633  on 406  degrees of freedom
## AIC: 595.94
## 
## Number of Fisher Scoring iterations: 2
#No significant outliers from Cook's distance plot
#Normal Q-Q plot shows residuals do not follow a normal distribution with very heavy tails on lower and upper end
#Residuals also do not show a random distribution and homoscedasticity
#Given the AIC value and the residual plots, we will not proceed with this model
plot(PIA_glm)

Multi-level model with household variables (Sameer)

#Research Question: Do socioeconomic indicators (Household Material Assets, Household Marine Tenure, Food Security Index and School Enrollment Rate predict place attachment to MPAs, when accounting for individual variation of place attachment within MPAs?
# Fit multi-level model with random intercept by MPA as a sensitivity analysis
place_attachment_household_mlm <- lme(Place_Attachment_Index ~ 
                            Household_Marine_Tenure +
                            Household_Material_Assets +
                            School_Enrollment_Rate +
                            Food_Security_Index, random = ~ 1 | MPAName, data = settlement_data2, na.action = na.exclude)
summary(place_attachment_household_mlm)
## Linear mixed-effects model fit by REML
##   Data: settlement_data2 
##         AIC      BIC   logLik
##   -68.69349 -41.2237 41.34674
## 
## Random effects:
##  Formula: ~1 | MPAName
##         (Intercept)  Residual
## StdDev:   0.0589693 0.2092381
## 
## Fixed effects:  Place_Attachment_Index ~ Household_Marine_Tenure + Household_Material_Assets +      School_Enrollment_Rate + Food_Security_Index 
##                               Value  Std.Error  DF  t-value p-value
## (Intercept)                4.198641 0.13190308 370 31.83125  0.0000
## Household_Marine_Tenure    0.024336 0.02037826 370  1.19422  0.2332
## Household_Material_Assets -0.136370 0.11518354 370 -1.18394  0.2372
## School_Enrollment_Rate    -0.194838 0.09097394 370 -2.14169  0.0329
## Food_Security_Index       -0.004839 0.01945609 370 -0.24871  0.8037
##  Correlation: 
##                           (Intr) Hs_M_T Hs_M_A Sc_E_R
## Household_Marine_Tenure   -0.644                     
## Household_Material_Assets -0.368  0.158              
## School_Enrollment_Rate    -0.499  0.121 -0.169       
## Food_Security_Index       -0.553  0.276 -0.041 -0.066
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.82829277 -0.66655643 -0.04793978  0.47614800  4.20203149 
## 
## Number of Observations: 379
## Number of Groups: 5
# Communicate Results
# The results of our multi-level regression reveal that School Enrollment Rate is a significant predictor (p < 0.05) of place attachment
# AIC: -68.69
#This checks out with our observations of the scatterplots

# household material assets p = 0.237
# household marine tenure p = 0.233
#food security index p = 0.804
# school enrollment rate p  = 0.033
#Residuals show mostly homoscedasticity 
plot(place_attachment_household_mlm)

#Residuals do not meet normality assumption - heavier tails on both ends
qqnorm(place_attachment_household_mlm)

# Compare AIC Values with logistic regression model - lower AIC value for multi-level model
#Better residual plots and lower AIC value means we choose multi-level model over logistic regression model
AIC(PIA_glm, place_attachment_household_mlm)
## Warning in AIC.default(PIA_glm, place_attachment_household_mlm): models are not
## all fitted to the same number of observations

Linear Regression Model (Kayla)

# Define Research Question:
# Is there a relationship between place attachment index and household material assets, 
# household marine tenure, and school enrollment rate?

# Examine data and possible correlations
# Raw Counts
ggplot(data = settlement_data2, aes(x = `Place_Attachment_Index`)) +
  geom_histogram(bins = 15) +
  theme_bw()

# pretty normally distributed 

ggplot(data = settlement_data2, aes(x = `Household_Material_Assets`)) +
  geom_histogram(bins = 15) +
  theme_bw()

# pretty normally distributed 

ggplot(data = settlement_data2, aes(x = `Household_Marine_Tenure`)) +
  geom_histogram(bins = 15) +
  theme_bw()

# not normally distributed

ggplot(data = settlement_data, aes(x = `School_Enrollment_Rate`)) +
  geom_histogram(bins = 15) +
  theme_bw()

# pretty skewed

# Possible Correlations
ggplot(data = settlement_data, aes(x = `Household_Material_Assets`, y = `Place_Attachment_Index`)) +
  geom_point() +
  theme_bw()

ggplot(data = settlement_data, aes(x = `Household_Marine_Tenure`, y = `Place_Attachment_Index`)) +
  geom_point() +
  theme_bw()

ggplot(data = settlement_data, aes(x = `School_Enrollment_Rate`, y = `Place_Attachment_Index`)) +
  geom_point() +
  theme_bw()

#unclear if any of those have any sort of correlation

# Numerically view correlation 
cor.test(settlement_data$Place_Attachment_Index, settlement_data$`Household_Material_Assets`)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Place_Attachment_Index and settlement_data$Household_Material_Assets
## t = -1.2861, df = 409, p-value = 0.1991
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.15921755  0.03346906
## sample estimates:
##        cor 
## -0.0634657
# very low.... -0.06
cor.test(settlement_data$Place_Attachment_Index, settlement_data$`Household_Marine_Tenure`)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Place_Attachment_Index and settlement_data$Household_Marine_Tenure
## t = 3.2469, df = 409, p-value = 0.001263
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.06275387 0.25139540
## sample estimates:
##       cor 
## 0.1585209
# slightly better .. 0.158
cor.test(settlement_data$Place_Attachment_Index, settlement_data$`School_Enrollment_Rate`)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Place_Attachment_Index and settlement_data$School_Enrollment_Rate
## t = -2.7473, df = 409, p-value = 0.006275
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.22836305 -0.03837756
## sample estimates:
##        cor 
## -0.1346072
# still low ... -0.135

# Multi-collinearity tests
cor.test(settlement_data$`Household_Material_Assets`, settlement_data$`Household_Marine_Tenure`)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Household_Material_Assets and settlement_data$Household_Marine_Tenure
## t = -1.2265, df = 409, p-value = 0.2207
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1563505  0.0364057
## sample estimates:
##         cor 
## -0.06053677
# pass
cor.test(settlement_data$`Household_Material_Assets`, settlement_data$`School_Enrollment_Rate`)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Household_Material_Assets and settlement_data$School_Enrollment_Rate
## t = 4.1855, df = 409, p-value = 3.486e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1080535 0.2936375
## sample estimates:
##       cor 
## 0.2026645
# pass
cor.test(settlement_data$`Household_Marine_Tenure`, settlement_data$`School_Enrollment_Rate`)
## 
##  Pearson's product-moment correlation
## 
## data:  settlement_data$Household_Marine_Tenure and settlement_data$School_Enrollment_Rate
## t = -3.3954, df = 409, p-value = 0.0007523
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.25816849 -0.06996526
## sample estimates:
##       cor 
## -0.165574
# pass 

# ok so no strong correlations, not worried about co-linearity either 
# try to fit model and see what happens 

# Fit regression model
Indonesia_household_model <- lm(Place_Attachment_Index ~ `Household_Material_Assets` +
                                   `Household_Marine_Tenure` + `School_Enrollment_Rate`, 
                                data = settlement_data)

summary(Indonesia_household_model)
## 
## Call:
## lm(formula = Place_Attachment_Index ~ Household_Material_Assets + 
##     Household_Marine_Tenure + School_Enrollment_Rate, data = settlement_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63066 -0.12002 -0.00832  0.09008  0.94731 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                4.08103    0.09169  44.509  < 2e-16 ***
## Household_Material_Assets -0.07023    0.10312  -0.681  0.49622    
## Household_Marine_Tenure    0.04589    0.01626   2.822  0.00501 ** 
## School_Enrollment_Rate    -0.17568    0.08433  -2.083  0.03784 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.207 on 407 degrees of freedom
## Multiple R-squared:  0.0383, Adjusted R-squared:  0.03121 
## F-statistic: 5.403 on 3 and 407 DF,  p-value: 0.001182
# Evaluate Model Diagnostics
plot(Indonesia_household_model)

# plots look fine, no outliers 

# Communicate Results
# The results of our multiple linear regression reveal that household marine tenure, 
# and school enrollment rate are significant predictors of place attachment in 
# Indonesia (F(407) = 5.403, adjusted R^2 = 0.03121, p = 0.0012). 

# household material assets t = 0.496
# household marine tenure t = 0.005 **
# school enrollment rate t  = 0.0378 *


# This model is weak, let's see what it looks like without household material assets
# Fit regression model
Indonesia_household_model2 <- lm(Place_Attachment_Index ~ `Household_Marine_Tenure`
                                 + `School_Enrollment_Rate`, 
                                data = settlement_data)

summary(Indonesia_household_model2)
## 
## Call:
## lm(formula = Place_Attachment_Index ~ Household_Marine_Tenure + 
##     School_Enrollment_Rate, data = settlement_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63033 -0.12315 -0.00653  0.09096  0.93604 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              4.05556    0.08366  48.477  < 2e-16 ***
## Household_Marine_Tenure  0.04620    0.01625   2.844  0.00468 ** 
## School_Enrollment_Rate  -0.18692    0.08264  -2.262  0.02423 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2068 on 408 degrees of freedom
## Multiple R-squared:  0.0372, Adjusted R-squared:  0.03248 
## F-statistic: 7.882 on 2 and 408 DF,  p-value: 0.0004377
# AIC Value
AIC(Indonesia_household_model2)
## [1] -123.964
# Evaluate Model Diagnostics
plot(Indonesia_household_model2)

# plots look fine

# The results of our multiple linear regression reveal that household marine tenure, 
# and school enrollment rate are significant predictors of place attachment in 
# Indonesia (F(408) = 7.882, adjusted R^2 = 0.03248, p = 0.0004). 
# household marine tenure t = 0.00468 **
# school enrollment rate t  = 0.0242 *

# better, but not sure what this tells us. maybe we try a multilevel model. 

Mixed Effects Model - Kayla

# Trying a mixed effects model with different predictor variables. A mix of individual and 
# household variables to try and build a more significant model. 
# Try a multilevel/mixed effects model using vars we know have a correlation
# with place attachment 

# Fit multi-level model with random intercept by plot
place_attachment_mem <- lme(Place_Attachment_Index ~ 
                            School_Enrollment_Rate +
                              Household_Marine_Tenure +
                              YrResident, 
                            random = ~ 1 | MPAName, data = settlement_data2, 
                            na.action=na.exclude)

# Compare AIC Values between all mixed effect models so far
AIC(place_attachment_mem, place_attachment_rlm, place_attachment_household_mlm)
## Warning in AIC.default(place_attachment_mem, place_attachment_rlm,
## place_attachment_household_mlm): models are not all fitted to the same number
## of observations
# mlm has the lowest AIC, with the mem having second lowest

# Plot residuals of multi-level model
plot(place_attachment_mem)

qqnorm(place_attachment_mem)

# Model summary
summary(place_attachment_mem)
## Linear mixed-effects model fit by REML
##   Data: settlement_data2 
##         AIC       BIC   logLik
##   -72.75938 -49.19782 42.37969
## 
## Random effects:
##  Formula: ~1 | MPAName
##         (Intercept)  Residual
## StdDev:  0.06266477 0.2074717
## 
## Fixed effects:  Place_Attachment_Index ~ School_Enrollment_Rate + Household_Marine_Tenure +      YrResident 
##                             Value  Std.Error  DF  t-value p-value
## (Intercept)              4.052218 0.10060027 371 40.28039  0.0000
## School_Enrollment_Rate  -0.243205 0.08936277 371 -2.72154  0.0068
## Household_Marine_Tenure  0.025942 0.01921014 371  1.35042  0.1777
## YrResident               0.003459 0.00135611 371  2.55101  0.0111
##  Correlation: 
##                         (Intr) Sc_E_R Hs_M_T
## School_Enrollment_Rate  -0.760              
## Household_Marine_Tenure -0.567  0.188       
## YrResident              -0.268 -0.121 -0.067
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.85107406 -0.64953050 -0.03015959  0.48822683  4.14707607 
## 
## Number of Observations: 379
## Number of Groups: 5
# Make table with AIC results 

# Create a data frame with model names and AIC values
aic_model1 <- AIC(place_attachment_rlm)
aic_model2 <- AIC(place_attachment_household_mlm)
aic_model3 <- AIC(place_attachment_mem)

aic_table <- data.frame(
  Model = c("Mixed Effect Model to Determine Place Attachment Attempt 1", 
            "Mixed Effect Model to Determine Place Attachment Attempt 2", 
            "Mixed Effect Model to Determine Place Attachment Attempt 3"),
  AIC = c(aic_model1, 
          aic_model2, 
          aic_model3)
)

# Print the table
print(aic_table)
##                                                        Model       AIC
## 1 Mixed Effect Model to Determine Place Attachment Attempt 1 -52.20510
## 2 Mixed Effect Model to Determine Place Attachment Attempt 2 -68.69349
## 3 Mixed Effect Model to Determine Place Attachment Attempt 3 -72.75938
# make pretty using gt
aic_table_gt <- aic_table %>%
  gt() %>%
  tab_header(
    title = "AIC Comparison of Mixed-Effects Models"
  )  %>%
  fmt_number(
    columns = vars(AIC),
    decimals = 2
  ) %>%
  cols_label(
    Model = "Model",
    AIC = "AIC Value"
  ) %>%
  tab_spanner_delim(
    delim = " ",
    columns = everything()
  )
## Warning: Since gt v0.3.0, `columns = vars(...)` has been deprecated.
## • Please use `columns = c(...)` instead.
# Print the table
aic_table_gt
AIC Comparison of Mixed-Effects Models
Model AIC Value
Mixed Effect Model to Determine Place Attachment Attempt 1 −52.21
Mixed Effect Model to Determine Place Attachment Attempt 2 −68.69
Mixed Effect Model to Determine Place Attachment Attempt 3 −72.76
# save the table 
gtsave(aic_table_gt, "aic_table.png")
## file:////var/folders/1k/wfyvwjys5cs9vp3xc7rpf18h0000gn/T//RtmptnazJi/file1ba124b510d4.html screenshot completed

Summary Statistics (Andrew)

# Summary Table
indonesia_summary <- settlement_data %>% 
group_by(MPAName) %>%
  summarise(count = n(),
            mean_place_attachment = mean(Place_Attachment_Index,
                                               na.rm = TRUE),
            median_place_attachment = median(Place_Attachment_Index, na.rm = TRUE),
            variance_place_attachment = var(Place_Attachment_Index, na.rm = TRUE),
            sd_place_attachment = sd(Place_Attachment_Index, na.rm = TRUE)) %>%
  ungroup()

# Rename columns
colnames(indonesia_summary) <- c("MPA Name", "Number of Observations",
                                 "Mean Place Attachment",
                                 "Median Place Attachment",
                                 "Variance Place Attachment",
                                 "SD Place Attachment")

# Export Table
write.csv(indonesia_summary, "Indonesia_Summary_Statistics.csv")

indonesia_gt <- indonesia_summary %>% gt() %>% 
  fmt_number(columns = c(`Mean Place Attachment`,
                         `Median Place Attachment`,
                         `Variance Place Attachment`,
                         `SD Place Attachment`),
             decimals = 2)

indonesia_gt
MPA Name Number of Observations Mean Place Attachment Median Place Attachment Variance Place Attachment SD Place Attachment
Kaimana MPA 65 3.96 3.96 0.03 0.17
Kofiau dan Pulau Boo MPA 32 4.04 4.03 0.01 0.10
Misool Selatan Timur MPA 76 3.99 4.02 0.02 0.16
Selat Dampier MPA 67 3.97 3.95 0.03 0.18
Teluk Cenderawasih NP 119 4.02 4.02 0.03 0.18
Teluk Mayalibit MPA 52 4.14 4.00 0.14 0.37
gtsave(indonesia_gt,
       "indonesia_summary_table.png",
       path = here())
## file:////var/folders/1k/wfyvwjys5cs9vp3xc7rpf18h0000gn/T//RtmptnazJi/file1ba127cd7c74.html screenshot completed
# Create mean of other variables table
indonesia_mean_summary <- settlement_data %>% 
  summarise(mean_household_material_assets = mean(Household_Material_Assets,
                                                  na.rm = TRUE),
            mean_household_marine_tenure = mean(Household_Marine_Tenure,
                                                na.rm = TRUE),
            mean_food_security_index = mean(Food_Security_Index,
                                            na.rm = TRUE),
            mean_place_attachment_index = mean(Place_Attachment_Index,
                                               na.rm = TRUE),
            mean_school_enrollment_rate = mean(School_Enrollment_Rate,
                                               na.rm = TRUE),
            mean_years_residency = mean(YrResident, na.rm = TRUE),
            mean_household_age = mean(Age, na.rm = TRUE),
            mean_gender = mean(Gender, na.rm = TRUE))

# Rename columns
colnames(indonesia_mean_summary) <- c("Household Material Assets",
                                      "Household Marine Tenure",
                                      "Food Security Index",
                                      "Place Attachment Index",
                                      "School Enrollment Rate",
                                      "Years of Residency",
                                      "Age",
                                      "Gender")

indonesia_mean_summary <- indonesia_mean_summary %>% 
  pivot_longer(cols = `Household Material Assets`:Gender,
               names_to = "Variable Name",
               values_to = "Mean")

# Export Table
indonesia_gt2 <- indonesia_mean_summary %>% gt() %>% 
  fmt_number(columns = c(Mean),
             decimals = 2)

indonesia_gt2
Variable Name Mean
Household Material Assets 0.48
Household Marine Tenure 2.40
Food Security Index 3.41
Place Attachment Index 4.01
School Enrollment Rate 0.81
Years of Residency 29.06
Age 44.69
Gender 0.91
gtsave(indonesia_gt2,
       "indonesia_mean_summary_table.png",
       path = here())
## file:////var/folders/1k/wfyvwjys5cs9vp3xc7rpf18h0000gn/T//RtmptnazJi/file1ba123a5b816.html screenshot completed